* Set up log
cd $ohie
cap log close
global sysdate: disp %tdYYNNDD  date("`c(current_date)'", "DMY")
qui log using 	"./logs/brfss_data_setup_$sysdate.log", replace

* Set up timer
timer clear 1
timer on 1 

/*----------------------------------------------------------------------*/
/* PROGRAM: brfss_data_setup.do						*/
/*									*/
/* PURPOSE:								*/
/* [*]	This code creates the analytic data set for the BRFSS data to	*/
/*	use for extrapolation. We only use the Massachusetts data for	*/
/*	extrapolation. 							*/	
/*									*/
/* OUTPUT:								*/
/* [*]	final/brfss.dta: This is the analytic data set for the		*/
/*	BRFSS data and will be used as input data set for the		*/
/*	extrapolation exercises.					*/
/*									*/
/*----------------------------------------------------------------------*/

* Set up display options
clear
set type double
set more off, permanently
		

*********************************************
* IMPORT RAW .XPT FILES   *
*********************************************
 
* 2004 data
import sasxport5 "$dofiles/CDBRFS04.XPT"
save "$final/brfss04.dta", replace

* 2005 data
import sasxport5 "$dofiles/CDBRFS05.XPT"
save "$final/brfss05.dta", replace

* 2006 data
import sasxport5 "$dofiles/CDBRFS06.XPT"
save "$final/brfss06.dta", replace               

* 2007 data
import sasxport5 "$dofiles/CDBRFS07.XPT"
save "$final/brfss07.dta", replace

* 2008 data
import sasxport5 "$dofiles/CDBRFS08.XPT"
save "$final/brfss08.dta", replace

* 2009 data
import sasxport5 "$dofiles/CDBRFS09.XPT"
save "$final/brfss09.dta", replace

*********************************************
* STACK TOGETHER ALL RAW DATA      *
*********************************************   

* Append all years and save the resulting data set
use "$final/brfss04.dta", clear
append using "$final/brfss05.dta"
append using "$final/brfss06.dta"
append using "$final/brfss07.dta"
append using "$final/brfss08.dta"
append using "$final/brfss09.dta"

compress
save "$final/brfss_temp.dta", replace

* Delete the year-by-year data
erase "$final/brfss04.dta"
erase "$final/brfss05.dta"
erase "$final/brfss06.dta"
erase "$final/brfss07.dta"
erase "$final/brfss08.dta"
erase "$final/brfss09.dta"
	
*********************************************
* MA DATA
*********************************************

* Generate a flag for after the reform
* After the reform is defined as Q3:2007 and onwards  (inclusive of Q3:2007)
gen after=0
replace after=1 if imonth=="07" & iyear=="2007"|imonth=="08" & iyear=="2007" /// 
|imonth=="09" & iyear=="2007"| imonth=="10" & iyear=="2007"|imonth=="11" & /// 
iyear=="2007"|imonth=="12" & iyear=="2007"| iyear=="2008"|iyear=="2009"|iyear=="2010"
		
* Generate a flag for Massachusetts
* Massachusetts is defined as having the FIPS code 25 for the variable 
* _state in the raw data
gen MA=0
replace MA=1 if _state==25

* Generate a flag for having any insurance
* We use the Massachusetts raw variable hlthplan which equals 1 if the
* individual had any insurance.
gen any_hlthplan=.
replace any_hlthplan=0 if hlthplan==2
replace any_hlthplan=1 if hlthplan==1



* Create age at expansion (2007) variable
destring iyear, force gen(year)
destring imonth, force gen(month)


* Restrict sample to under age 65 and over age 18. In practice,
* no one in the OHIE sample is 19 years old, so we restrict age to 21+.
keep if inrange(age,21,64)
	
* Restrict sample to only Massachusetts residents
keep if MA==1
	
* Recode self-reported health variable
g selfhealth = genhlth
recode selfhealth (7/9=.)
g selfhealth_fair = selfhealth >= 4 if !mi(selfhealth)
g selfhealth_poor = selfhealth >= 5 if !mi(selfhealth)
	
* Define Z
* In order for the extrapolation code to work, the instrument MUST be called Z
gen byte Z = MA * after
	
* Define D
* D in the case of BRFSS means that the individual had any insurance. The exact
* question is defined as follows in the BRFSS codebook: "Do you have any kind
* of health care coverage, including health insurance, prepaid plans such
* as HMOs, or government plans such as Medicare?". Note that this is different
* from the OHIE data, where the endogenous variable D is defined as having
* any Medicaid insurance.
gen byte D = any_hlthplan
	
* Re-code the covariates

* Gender
gen byte female = (sex==2)

* English
gen byte english = (qstlang==1)

* Weights
* Rename the weights variable from the raw data called "_finalwt" to w. Note
* that the frequency weights are an integral part of the extrapolation code
* and therefore the BRFSS weights variable must be called "w". Even though in
* Oregon we do not really use the weights since everyone has weight = 1, the
* weight variable in the BRFSS data is crucial.
rename _finalwt w					
	
* Keep recoded vars and reorder
local varlist "Z D female age english selfhealth* w iyear imonth year month"
keep  `varlist'
order `varlist'
compress
	
* Create interaction terms of Xs (common covariates)
* For the subsequent extrapolation codes to work, all common covariates need
* to begin with "_X".
	
* List all covariates (both binary and continious)
local X_vals "female english age"	

foreach X of local X_vals {
	local Xs "`Xs' i.`X'"
}

* Create a list of all two-way interactions between age, gender, and English
local i = 1
foreach X1 of local Xs {
	local j = 1
	foreach X2 of local Xs {
		if `i' < `j' {
			local _X "`_X' `X1'*`X2'"
		}
		local ++j
	}
	local ++i
}

* Construct interactions using xi
xi `_X'

* Rename variables so that "ds _X*" returns the list of common covariates
renvars _I*, presub(_I _X)
					
* Create interaction terms with Z
* In order for the subsequent extrapolation codes to work, all interaction
* terms need to begin with "_Z".
foreach cov of varlist _X* {
	gen Z`cov' = `cov'*Z
	
}
		
renvars Z_*, presub (Z _Z)	

* Final set up
	
* Generate a person ID 
gen person_id = _n
tostring person_id, replace

* Drop people who have missing values for D
* We do this to be consistent with "The Impact of Health Care Reform on
* Hospital and Preventive Care: Evidence from Massachusetts
* (with Jonathan T. Kolstad)"
drop if D==.
	
* Save
compress
save "$final/brfss.dta", replace
save "$final_analytic/brfss.dta", replace

*Delete the temp file
erase "$final/brfss_temp.dta"

timer off 1
timer list 1
local hours = `r(t1)'/3600
di "Computing time is `hours' hours"
	
qui log close
